\begin{table}[!h]

\caption{\label{tab:function-task-static-ablation}Shows the success-rate for $\mathcal{L}_{\mathbf{W}_1, \mathbf{W}_2} < \mathcal{L}_{\mathbf{W}_1^\epsilon, \mathbf{W}_2^*}$, at what global step the model converged at and the sparsity error for all weight matrices, with 95\% confidence interval. The dataset is the multiplication problem with default parameters.}
\centering
\begin{tabular}{rllll}
\toprule
\multicolumn{1}{c}{Model} & \multicolumn{1}{c}{Success} & \multicolumn{2}{c}{Solved at} & \multicolumn{1}{c}{Sparsity error} \\
\cmidrule(l{3pt}r{3pt}){1-1} \cmidrule(l{3pt}r{3pt}){2-2} \cmidrule(l{3pt}r{3pt}){3-4} \cmidrule(l{3pt}r{3pt}){5-5}
 & Rate & Median & Mean & Mean\\
\midrule
$\mathrm{NAC}_{\bullet,\sigma}$ & $100\%$ & $2.5 \cdot 10^{6}$ & $2.6 \cdot 10^{6} \pm 1.6 \cdot 10^{5}$ & $1.0 \cdot 10^{-4} \pm 4.9 \cdot 10^{-5}$\\

$\mathrm{NAC}_{\bullet}$ & $24\%$ & $2.9 \cdot 10^{6}$ & $3.0 \cdot 10^{6} \pm 6.6 \cdot 10^{5}$ & $4.0 \cdot 10^{-4} \pm 4.1 \cdot 10^{-4}$\\

NMU & $100\%$ & $1.4 \cdot 10^{6}$ & $1.4 \cdot 10^{6} \pm 6.7 \cdot 10^{4}$ & $4.4 \cdot 10^{-7} \pm 6.9 \cdot 10^{-8}$\\

NMU, no $\mathcal{R}_{bias}$ & $100\%$ & $1.9 \cdot 10^{6}$ & $1.9 \cdot 10^{6} \pm 1.2 \cdot 10^{5}$ & $9.4 \cdot 10^{-4} \pm 2.9 \cdot 10^{-4}$\\

NMU, no $\mathcal{R}_{bias}$, no W-clamp & $100\%$ & $1.5 \cdot 10^{6}$ & $1.5 \cdot 10^{6} \pm 1.0 \cdot 10^{5}$ & $2.8 \cdot 10^{-4} \pm 5.3 \cdot 10^{-5}$\\

NMU, no W-clamp & $100\%$ & $1.3 \cdot 10^{6}$ & $1.3 \cdot 10^{6} \pm 6.6 \cdot 10^{4}$ & $8.8 \cdot 10^{-5} \pm 6.2 \cdot 10^{-5}$\\
\bottomrule
\end{tabular}
\end{table}
